In [18]:
const _ = require('lodash');
const regeneratorRuntime = require('regenerator-runtime-only')
// generate denormalized data
const levels = {
year : ['2015', '2016'],
month : ['Jan', 'Feb'],
chapter : _.range(1, 21).map(i => `Ch. ${i}`),
volume : ['Vol. I', 'Vol. II', 'Vol. III'],
name : ['Book A', 'Book B', 'Book C']
};
const measures = {
wordCount : () => _.random(1000, 2500),
userRating: () => _.random(1, 5)
}
const datumGen = () => {
const d1 = _.mapValues(levels, level => _.sample(level));
const d2 = _.mapValues(measures, m => m());
return Object.assign(d1, d2);
}
datumGen()
Out[18]:
In [19]:
const rawData = _.range(1000).map(datumGen);
// first 4 rows
const sample = rawData.slice(0, 4);
sample
Out[19]:
In [51]:
function* runMapper(rows, mapper) {
for (let row of rows) {
yield* mapper(row);
}
}
const pivotMapper = (rowKeys) => (colKeys) => (measureKeys) => function* (d) {
const rowVals = rowKeys.map(k => d[k]);
const colVals = colKeys.map(k => d[k]);
const measureVals = measureKeys.map(k => d[k]);
for (let k of _.range(rowKeys.length)) {
let r = rowVals.map((v,i) => (i>k ? null : v));
yield [...r, ...colVals, ...measureVals]
}
}
// two row fields, 0 column fields, 1 measure fields
const mapperTest = runMapper(sample, pivotMapper(['name', 'volume'])([])(['wordCount']))
mapperTest
Out[51]:
In [52]:
function keyBasedComparator(keys) {
return (a,b) => {
for(let key of keys) {
if(a[key] === b[key]) continue;
if(a[key] === null) return -1;
if(b[key] === null) return 1;
if(a[key] > b[key]) return 1;
if(a[key] < b[key]) return -1;
}
return 0;
}
}
function iterToArray(it) {
if (Array.isArray(it)) return it;
let array = [];
for(let x of it) {
array.push(x);
}
return array;
}
function sortMapperOutput(output, keys) {
output = iterToArray(output);
output.sort(keyBasedComparator(keys));
return output;
}
const sampleSortKeys = [0, 1];
const mapperTestOutput = iterToArray(mapperTest);
const sortedMapperOutput = sortMapperOutput(mapperTestOutput, sampleSortKeys);
sortedMapperOutput
Out[52]:
In [49]:
function* groupBySorted(iterable, keys) {
let lastX = null;
let buffer = [];
let compare = keyBasedComparator(keys);
for( let x of iterable) {
if(!lastX) {
buffer.push(x);
} else {
const sameGroup = compare(x, lastX);
if (sameGroup === 0) {
buffer.push(x);
} else {
yield buffer;
buffer = [x];
}
}
lastX = x;
}
if(buffer.length) yield buffer;
}
const groupedData = iterToArray(
groupBySorted(sortedMapperOutput, sampleSortKeys)
)
groupedData
Out[49]:
In [50]:
function* reduceWithKey(iterable, fn, seed) {
for(let group of iterable) {
yield group.reduce(fn, seed);
}
}
function aggregateWordCount(acc, nextRow) {
const key = _.initial(nextRow);
const value = _.last(nextRow);
acc = _.last(acc);
return [...key, acc+value];
}
const pivotResults = reduceWithKey(groupedData, aggregateWordCount, [null, null, 0]);
iterToArray(
pivotResults
)
Out[50]:
In [72]:
const rowFields = ["name", "volume"];
const colFields = ["year", "month"];
const measures = ["wordCount", "userRating"];
const mapperStep =
runMapper(
rawData,
pivotMapper(rowFields)(colFields)(measures)
);
// sort and group by row and col fields
const sortKeys = _.range(rowFields.length + colFields.length);
const rowFieldsKeys = _.range(rowFields.length);
const sortStep = sortMapperOutput(mapperStep, sortKeys);
const groupStep = groupBySorted(sortStep, sortKeys);
Out[72]:
In [73]:
const empty = [null, null, null, null, 0, { sum: 0, n: 0 }]; // reducer seed
const keyLen = rowFields.length + colFields.length;
// sum word count, average userRating
function aggregate(acc, nextRow) {
const key = nextRow.slice(0, keyLen);
const value = nextRow.slice(keyLen);
acc = acc.slice(keyLen);
const [accWordCount, accUserRating] = acc;
const [nextWordCount, nextUserRating] = value;
const userRating = {
sum : accUserRating.sum + nextUserRating,
n : accUserRating.n + 1
};
return [...key, accWordCount+nextWordCount, userRating ];
}
const results = reduceWithKey(groupStep, aggregate, empty);
Out[73]:
In [75]:
// compute average after aggregation
const pivoted = iterToArray(results).map(r => {
const key = r.slice(0, keyLen);
const value = r.slice(keyLen);
const [wordCount, userRating] = value;
// divide sum / count => average
return [ ...key, wordCount, userRating.sum / userRating.n]
});
Out[75]:
Stich together API response.
In [79]:
const data = {
headers: [...rowFields, ...colFields, ...measures],
values : pivoted
}
data
Out[79]:
In [97]:
const rowIndices = _.range(rowFields.length);
const colIndices = _.range(colFields.length).map(i => i + rowFields.length);
const measureIndices = _.range(measures.length)
.map(i => i + rowFields.length + colFields.length);
const uniqColValues = colIndices.map(k => {
const getter = r => r[k];
return _.uniqBy(data.values, getter).map(getter);
});
uniqColValues
Out[97]:
In [103]:
const cartesianProduct = (...collections) => {
collections.reverse();
const products = collections.reduce( // ? reduceRight
(tuples, vals) => (tuples ?
_.flatMap(vals, v => tuples.map(t => t.concat([v]))) :
vals.map(v => [v])),
null
).map(t => { t.reverse(); return t; })
return products;
}
const uniqColCombinations = cartesianProduct(...uniqColValues);
uniqColCombinations
Out[103]:
In [118]:
const makeMapFromPairs = (pairs) => {
const map = new Map();
for(let p of pairs) {
const [key, value] = p;
map.set(key, value);
// map.set(...p);
}
return map;
}
const nMeasures = measures.length;
const nPivotColumns = nMeasures * uniqColCombinations.length;
const pivotIndicesMapping = makeMapFromPairs(
_.zip(
uniqColCombinations.map(comb => comb.join(",")),
_.range(0, uniqColCombinations.length*nMeasures, nMeasures)
)
)
pivotIndicesMapping
Out[118]:
In [119]:
const takeFromIndices = (arr, indices) => {
return indices.map(j => arr[j]);
}
const mergeByRow = (hashMap, nextRow) => {
if(!hashMap) {
hashMap = new Map();
} else {
hashMap = hashMap[1];
}
const colKey = takeFromIndices(nextRow, colIndices).join(",");
const value = takeFromIndices(nextRow, measureIndices);
hashMap.set(colKey, value);
return [takeFromIndices(nextRow, rowIndices), hashMap];
}
const groupByRow = groupBySorted(data.values, _.range(rowFields.length));
const tallyed = iterToArray( reduceWithKey(groupByRow, mergeByRow, null) );
tallyed.forEach(v => console.log(v));
""
Out[119]:
In [134]:
const pivotTable = tallyed.map(([rowKey, valueMap]) => {
const pivotRow = new Array(nPivotColumns);
for (let [colKey, mVals] of valueMap.entries()) {
const istart = pivotIndicesMapping.get(colKey);
const iend = istart + nMeasures;
for(let i of _.range(istart, iend)) {
pivotRow[i] = mVals[i - istart];
}
}
return [...rowKey, ...pivotRow];
})
const pivotHeaders = _.flatMap(uniqColCombinations, comb => (
measures.map(k=> [...comb, k])
))
const betterData = {
headers: [...rowFields, ...pivotHeaders],
values: pivotTable
}
betterData
Out[134]:
In [171]:
class Tree {
constructor(nodeData, children) {
if(nodeData instanceof Tree) {
return nodeData;
}
this.nodeData = _.isUndefined(nodeData) ? null : nodeData;
this.childNodes = children || {};
}
isLeaf() {
return _.size(this.childNodes) === 0;
}
merge(otherTree) {
const nodeData = otherTree.nodeData || this.nodeData;
const childrenKeys = _.union(
Object.keys(this.childNodes),
Object.keys(otherTree.childNodes)
);
const children = childrenKeys.map(k => {
const t1 = new Tree(this.childNodes[k]);
const t2 = new Tree(otherTree.childNodes[k]);
return [k, t1.merge(t2)]
});
return new Tree(nodeData, _.fromPairs(children));
}
}
Tree.fromPath = function(pathArray, leafData) {
return pathArray.reduceRight((child, pathPart) => {
if(!pathPart) {
return child;
}
if(!child) {
const leaf = new Tree(leafData);
return new Tree(null, { [pathPart] : leaf });
}
return new Tree(null, { [pathPart]: child });
}, null);
}
null
Out[171]:
In [172]:
console.log(JSON.stringify(
uniqColCombinations
.map(Tree.fromPath)
.reduce((a,b) => a.merge(b), new Tree()),
null,
2
))
Out[172]:
Same stuff can be used to make a tree out of row hierarchy:
In [175]:
const pivotRowTreeValues = data.values.map(r => r.slice(0, rowFields.length));
console.log(JSON.stringify(
pivotRowTreeValues
.map(Tree.fromPath)
.reduce((a,b) => a.merge(b), new Tree()),
null,
2
));
Out[175]: